package Partie3;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;

import java.io.InputStreamReader;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import Partie1.BernoulliParser;

import commun.Review;
import commun.ReviewType;


/**
 * @author Yangkun
 * 
 * classificateur de la mod鑜e multinomiale non supervis閑
 *
 */
public class MultinomialAnalyserNonSupervise {
	/**
	 * statistiques enregistre tous les donnees d'apprentissage 
	 */
	MultinomialParserGeneric statistiques;
	/**
	 * pour chaque review, cette map enregistre son id et son type pronosique.
	 */
	Map<Integer, ReviewType> reviewTypes;
	/**
	 * pour enregistrer tous les review du document sans 閠iquette
	 */
	Map<Integer, Review> unLabReviews;
	int nbCorrect = 0;
	int nbFault = 0;
	int nbPosiAsNeg =0;
	int nbNegAsPos =0;
	int nbReview = 0;
	/**
	 * le taux correct.
	 */
	double ratio;
	
	
	public static void main(String[] args) throws Exception{
		if(args.length < 5)
		{
			System.out.println("Erreur, il faut avoire au au moins deux paramètre.");
			return;
		}
		MultinomialAnalyserNonSupervise da = new MultinomialAnalyserNonSupervise(args,0.55);
		da.parseReviews(args[4]);
		System.out.println("nombre review: "+da.nbReview+" nb correct: "+ da.nbCorrect +" nb fault: "+ da.nbFault+" ratio: "+da.ratio);
		System.out.println("Positive review as negative: "+ da.nbPosiAsNeg +" Negative review as positive: "+ da.nbNegAsPos);
		da.output("output.txt");
	}
	
	/**
	 * constructeur, initialiser les hashmaps, obtenir les statistiques
	 * @param fileSansEtiquette: o�se trouve le document sans etiquette: unlab.txt;
	 * @param lissage : parametre de reglage
	 * @throws Exception
	 */
	public MultinomialAnalyserNonSupervise(String[] args,double lissage) throws Exception{
		this.reviewTypes = new Hashtable<Integer, ReviewType>();
		this.statistiques = new MultinomialParserGeneric(lissage);
		this.unLabReviews = new HashMap<Integer, Review>();
		
		statistiques.parseFile(args[0]);
		statistiques.parseFile(args[1]);
		statistiques.parseFile(args[2]);
		
		statistiques.calculProbabilite();
		parseUnlabReviews(args[3]);
	}
	
	
	/**
	 * imprimer les pronositque dans un fichier.
	 * @param filepath output file
	 * @throws IOException
	 */
	public void output(String filepath) throws IOException{
		FileWriter fwTypes = new FileWriter(filepath);//
        BufferedWriter bwTypes = new BufferedWriter(fwTypes);    //
        String myreadline = new String();    
        
        Object[] key_arr = this.reviewTypes.keySet().toArray();  
        Arrays.sort(key_arr);  
        for (Object key : key_arr) {  
            myreadline = "<review id='"+key+ "' class='"+ (ReviewType)this.reviewTypes.get(key)+"'>"+System.getProperty("line.separator");
	        bwTypes.write(myreadline); //
        }  
		bwTypes.flush();    //
		bwTypes.close();
		fwTypes.close();
	}
	
	/**
	 * pour le review sans 閠iquette 閠udi� calcule sa posibilit�d'阾re positive et sa posibilit�d'阾re negative;
	 * Et l'enregistrer dans le hashmap unlabReviews.
	 * @param review le review etudi�
	 * @throws Exception
	 */
	public void PreDecideReview(Review review) throws Exception{
		double logPropaPositive =0;
		double logPropaNegative=0;
	    Iterator iter = review.wordsMap.entrySet().iterator(); 
	    while (iter.hasNext()) { 
	    	Map.Entry entry = (Map.Entry) iter.next();
	        String key = (String)entry.getKey();
	        if(this.statistiques.PositiveTable.containsKey(key)){
	        	logPropaPositive += (Double)entry.getValue()*Math.log(this.statistiques.PositiveTable.get(key));
	        }
	        if(this.statistiques.NegativeTable.containsKey(key)){
	        	logPropaNegative += (Double)entry.getValue()*Math.log(this.statistiques.NegativeTable.get(key));
	        }
	   
	    } 
	    /**
		 * posons log(b1) = l1, log(b2) = l2, p1+p2=1;
		 * log(b1/b2) = log(b1)-log(b2) = l1-l2;
		 * b1 = b2*exp(l1-l2);
		 * b2 = 1.0/(1.0+exp(l1-l2));
		 * pour eviter que l1-l2 soit trop petit, on va toujours calculer le cas o�l1>l2;
		 */
	    if (logPropaNegative>logPropaPositive) {
	    	 review.propaPositive =1.0/(1.0+Math.exp(logPropaNegative-logPropaPositive));
	    	 review.propaNegative = 1-review.propaPositive;
		}else {
			review.propaNegative =1.0/(1.0+Math.exp(logPropaPositive-logPropaNegative));
			review.propaPositive = 1- review.propaNegative;
		}
	    unLabReviews.put(review.id, review);
	}
	
	

	/**
	 * vider les statistiques de trainning.txt, et r閠ablir la staitistique de unlab.txt
	 */
	public void updateStatistiques(){
		this.statistiques.init();
		Iterator iter = this.unLabReviews.entrySet().iterator(); 
		while (iter.hasNext()) { 
	    	Map.Entry entry = (Map.Entry) iter.next();
	        int key = (Integer)entry.getKey();
	        Review review = (Review)entry.getValue();
	        this.statistiques.nbWordsNegatif += review.nbWords*review.propaNegative;
	        this.statistiques.nbWordsPositif += review.nbWords*review.propaPositive;
		    Iterator wordsIter = review.wordsMap.entrySet().iterator(); 
		    while (wordsIter.hasNext()) { 
		    	Map.Entry entryword = (Map.Entry) wordsIter.next();
		        String word = (String)entryword.getKey();
		        double nbwords = (Double)entryword.getValue();
		        if (this.statistiques.Positifwords.containsKey(word)) {
		        	//System.out.println("omg");
					double nb = this.statistiques.Positifwords.get(word);
					nb += nbwords * review.propaPositive;
					this.statistiques.Positifwords.put(word, nb);
				}else {
					double nb = nbwords * review.propaPositive;
					this.statistiques.Positifwords.put(word, nb);
				}
		        
		        if (this.statistiques.Negatifwords.containsKey(word)) {
		        	//System.out.println("omg2");
					double nb = this.statistiques.Negatifwords.get(word);
					nb += nbwords * review.propaNegative;
					this.statistiques.Negatifwords.put(word, nb);
				}else {
					double nb = nbwords * review.propaNegative;
					this.statistiques.Negatifwords.put(word, nb);
				}
		        
		    }
		}
		this.statistiques.calculProbabilite();
	}
	
	/**
	 * @param review review �閠udier
	 * @return le type pronositique
	 * @throws Exception
	 */
	public ReviewType decideReview(Review review) throws Exception{
		double logPropaPositive =0;
		double logPropaNegative=0;
		ReviewType typePronostic;
	    Iterator iter = review.wordsMap.entrySet().iterator(); 
	    while (iter.hasNext()) { 
	    	Map.Entry entry = (Map.Entry) iter.next();
	        String key = (String)entry.getKey();
	        if(this.statistiques.PositiveTable.containsKey(key)){
	        	logPropaPositive += (Double)entry.getValue()*Math.log(this.statistiques.PositiveTable.get(key));
	        }
	        if(this.statistiques.NegativeTable.containsKey(key)){
	        	logPropaNegative += (Double)entry.getValue()*Math.log(this.statistiques.NegativeTable.get(key));
	        }
	    } 
	    
		if(logPropaPositive > logPropaNegative){
			typePronostic = ReviewType.Positive;
		}else{
			typePronostic = ReviewType.Negative;
		}

		return typePronostic;

	}
	

	/**
	 * parser tous les review dans le fichier.
	 * @param filepath o�se trouve le fichier 閠udi�
	 * @throws Exception
	 */
	public  void parseReviews(String filepath) throws Exception{
		File f = new File(filepath); 
		FileInputStream fstream = new FileInputStream(f);
		DataInputStream in = new DataInputStream(fstream);		
		BufferedReader br = new BufferedReader(new InputStreamReader(in));
		String strLine;
		while ((strLine = br.readLine()) != null)   {
			if(strLine.contains("<review") && strLine.contains("id"))
			{
	    		Review unReview = BernoulliParser.parseUnReview(br, strLine); 
	    		this.nbReview++;
	    		ReviewType typePronostic = decideReview(unReview);
	    		this.reviewTypes.put(unReview.id, typePronostic);
	    		if (unReview.realType!=commun.ReviewType.Unknown) {
	    			if(typePronostic.compareTo(unReview.realType)==0){
		    			this.nbCorrect++;
		    		}else{
		    			if(typePronostic.equals(ReviewType.Positive))
		    				this.nbNegAsPos++;
		    			else
		    				this.nbPosiAsNeg++;
		    			this.nbFault++;
		    		}
				}
			}
		 }
		
		this.ratio = (double)this.nbCorrect/this.nbReview;
	    br.close();
	    in.close();
	    fstream.close(); 
	}
	
	
	
	/**
	 * parser tous les reviews dans le fichier unlab.txt.
	 * @param filepath o�se trouve le fichier 閠udi�
	 * @throws Exception
	 */
	public  void parseUnlabReviews(String ublabfilepath) throws Exception{
		File f = new File(ublabfilepath); 
		FileInputStream fstream = new FileInputStream(f);
		DataInputStream in = new DataInputStream(fstream);		
		BufferedReader br = new BufferedReader(new InputStreamReader(in));
		String strLine;
		while ((strLine = br.readLine()) != null)   {
			if(strLine.contains("<review") && strLine.contains("id"))
			{
	    		Review unReview = BernoulliParser.parseUnReview(br, strLine); 
	    		PreDecideReview(unReview);
			}
		 }
		updateStatistiques();
		
		
	    br.close();
	    in.close();
	    fstream.close(); 
	}
	
	
	
}
